# clear environment
rm(list=ls())

# load packages
library(openxlsx)
library(ggplot2)

# load data set with organization keyword counts and crime numbers
dat = openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet = 1)

# binary varibable indicating whether interior minister is from a center-right party
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1, ifelse(dat$intmin_party=="pro", NA, 0))

# remove those without center-right or center-left interior minister
dat <- dat[!is.na(dat$cr_intmin),]

# logged ratio of organization keywords in chapters on right-wing extremism and left-wing extremism
dat$ratio_log_orga <- log(dat$count_orga_kw_right_chapter + 0.5) - log(dat$count_orga_kw_left_chapter + 0.5)

# ratio in number of words
dat$rwe_lwe_words_ratio <- dat$count_words_right_chapter / dat$count_words_left_chapter

# ratio in crimes
dat$rwe_lwe_crime_ratio <- dat$extreme_crime_right / dat$extreme_crime_left

# create decade indicator
dat$decade <- NA
dat$decade[dat$year<1960]<-1950
dat$decade[dat$year>=1960 & dat$year<1970]<-1960
dat$decade[dat$year>=1970 & dat$year<1980]<-1970
dat$decade[dat$year>=1980 & dat$year<1990]<-1980
dat$decade[dat$year>=1990 & dat$year<2000]<-1990
dat$decade[dat$year>=2000 & dat$year<2010]<-2000
dat$decade[dat$year>=2010 & dat$year<2020]<-2010
dat$decade[dat$year>=2020 & dat$year<2030]<-2020
dat <- dat[dat$year>1950,]

# merge in polling data
polbar <- readstata13::read.dta13("polbar_agg.dta")
polbar$jurisdiction <- tolower(polbar$jurisdiction)
dat$jurisdiction <- gsub("ü", "u", dat$jurisdiction)
dat <- merge(dat, polbar, by.x=c("jurisdiction", "year_pub"), by.y=c("jurisdiction", "year"), all.x=T)

# imputation
dat_mice <- dat[!is.na(dat$ratio_log_orga) & !is.na(dat$cr_intmin),
                c("ratio_log_orga", "rwe_lwe_crime_ratio", "cr_intmin", "jurisdiction", "year",
                  "pmk_right", "pmk_left", "extreme_crime_violent_right", "extreme_crime_violent_left",
                  "person_right", "person_left", "vshead_party", "extreme_crime_islamist", "person_violent_islamist",
                  "extreme_crime_left", "extreme_crime_right", "rwe_lwe_words_ratio", "polbar_fr_vote_pct", "elec_fr_vote_pct")]
dat_imputed <- mice::mice(dat_mice, m=20, maxit = 50, method = 'pmm', seed = 500)

# regression models
mod_1<-lm(ratio_log_orga~factor(cr_intmin), data=dat)
mod_2<-lm(ratio_log_orga~factor(cr_intmin)+factor(jurisdiction), data=dat)
mod_3<-lm(ratio_log_orga~factor(cr_intmin)+factor(jurisdiction)+factor(decade), data=dat)
mod_4<-lm(ratio_log_orga~factor(cr_intmin)+factor(jurisdiction)+factor(year), data=dat)
mod_5<-lm(ratio_log_orga~factor(cr_intmin)+rwe_lwe_words_ratio+factor(jurisdiction)+factor(year), data=dat)
mod_6<-mice::pool(with(dat_imputed, exp=lm(ratio_log_orga~factor(cr_intmin)+rwe_lwe_words_ratio+rwe_lwe_crime_ratio+factor(jurisdiction)+factor(year))))


## coefficient plot

# get estimates
mod_1_est = summary(mod_1)$coefficients[2,1:2]
mod_2_est = summary(mod_6)[2,2:3]

# harmonize and combine
names(mod_1_est) = c("coef", "se")
names(mod_2_est) = c("coef", "se")
ests_df = rbind.data.frame(mod_1_est, mod_2_est)

# create model variable
ests_df$model = c("w/o Controls", "w/ Controls") 
ests_df$model = factor(ests_df$model, levels=c("w/o Controls", "w/ Controls"))

# create plot
ggplot(data=ests_df, aes(x=model, y=coef)) + 
  geom_errorbar(aes(ymax=coef+1.96*se, ymin=coef-1.96*se), width=0, position = position_dodge(width = 0.5)) +
  geom_point(position=position_dodge(width=.5), size=2.5) + 
  theme_classic() +
  #  theme_bw() +
  theme(panel.grid = element_blank()) +
  geom_hline(yintercept = 0, linetype="dashed") +
  theme(legend.position = "bottom", text = element_text(size=18)) +
  xlab("") + ylab("Coefficient of Center-Right\nInterior Minister") +
  scale_shape_manual(values=c(16, 17), name="")

ggsave("Fig4_4.pdf", width = 7, height=5)

